{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0c311dc3",
   "metadata": {},
   "source": [
    "# Use Categorical DQN to Play Pong\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "43ee70df",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import losses\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import models\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb3b026c",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4abedbed",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:34:27 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "00:34:27 [INFO] action_space: Discrete(6)\n",
      "00:34:27 [INFO] observation_space: Box(0, 255, (4, 84, 84), uint8)\n",
      "00:34:27 [INFO] reward_range: (-inf, inf)\n",
      "00:34:27 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "00:34:27 [INFO] num_stack: 4\n",
      "00:34:27 [INFO] lz4_compress: False\n",
      "00:34:27 [INFO] frames: deque([], maxlen=4)\n",
      "00:34:27 [INFO] id: PongNoFrameskip-v4\n",
      "00:34:27 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "00:34:27 [INFO] reward_threshold: None\n",
      "00:34:27 [INFO] nondeterministic: False\n",
      "00:34:27 [INFO] max_episode_steps: 400000\n",
      "00:34:27 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "00:34:27 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "192626a0",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "47f918f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class CategoricalDQNAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1. # exploration\n",
    "        \n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        atom_count = 51\n",
    "        self.atom_min = -10.\n",
    "        self.atom_max = 10.\n",
    "        self.atom_difference = (self.atom_max - self.atom_min) / (atom_count - 1)\n",
    "        self.atom_tensor = tf.linspace(self.atom_min, self.atom_max, atom_count)\n",
    "\n",
    "        self.evaluate_net = self.build_net(self.action_n, atom_count)\n",
    "        self.target_net = models.clone_model(self.evaluate_net)\n",
    "\n",
    "    def build_net(self, action_n, atom_count):\n",
    "        net = keras.Sequential([\n",
    "                keras.layers.Permute((2, 3, 1), input_shape=(4, 84, 84)),\n",
    "                layers.Conv2D(32, kernel_size=8, strides=4, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=4, strides=2, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=3, strides=1, activation=nn.relu),\n",
    "                layers.Flatten(),\n",
    "                layers.Dense(512, activation=nn.relu),\n",
    "                layers.Dense(action_n * atom_count),\n",
    "                layers.Reshape((action_n, atom_count)), layers.Softmax()])\n",
    "        optimizer = optimizers.Adam(0.0001)\n",
    "        net.compile(loss=losses.mse, optimizer=optimizer)\n",
    "        return net\n",
    "        \n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = tf.convert_to_tensor(np.array(observation)[np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        prob_tensor = self.evaluate_net(state_tensor)\n",
    "        q_component_tensor = prob_tensor * self.atom_tensor\n",
    "        q_tensor = tf.reduce_mean(q_component_tensor, axis=2)\n",
    "        action_tensor = tf.math.argmax(q_tensor, axis=1)\n",
    "        actions = action_tensor.numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "            \n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        average_weights = [(1. - learning_rate) * t + learning_rate * e for t, e\n",
    "                in zip(target_net.get_weights(), evaluate_net.get_weights())]\n",
    "        target_net.set_weights(average_weights)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = tf.convert_to_tensor(states, dtype=tf.float32)\n",
    "        reward_tensor = tf.convert_to_tensor(rewards[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        done_tensor = tf.convert_to_tensor(dones[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        next_state_tensor = tf.convert_to_tensor(next_states, dtype=tf.float32)\n",
    "\n",
    "        # compute target\n",
    "        next_prob_tensor = self.target_net(next_state_tensor)\n",
    "        next_q_tensor = tf.reduce_sum(next_prob_tensor * self.atom_tensor,\n",
    "                axis=2)\n",
    "        next_action_tensor = tf.math.argmax(next_q_tensor, axis=1)\n",
    "        next_actions = next_action_tensor.numpy()\n",
    "        indices = [[idx, next_action] for idx, next_action in\n",
    "                enumerate(next_actions)]\n",
    "        next_dist_tensor = tf.gather_nd(next_prob_tensor, indices)\n",
    "        next_dist_tensor = tf.reshape(next_dist_tensor,\n",
    "                shape=(batch_size, 1, -1))\n",
    "\n",
    "        # project\n",
    "        target_tensor = reward_tensor + self.gamma * tf.reshape(\n",
    "                self.atom_tensor, (1, -1)) * (1. - done_tensor) # broadcast\n",
    "        clipped_target_tensor = tf.clip_by_value(target_tensor,\n",
    "                self.atom_min, self.atom_max)\n",
    "        projection_tensor = tf.clip_by_value(1. - tf.math.abs(\n",
    "                clipped_target_tensor[:, np.newaxis, ...]\n",
    "                - tf.reshape(self.atom_tensor, shape=(1, -1, 1)))\n",
    "                / self.atom_difference, 0, 1)\n",
    "        projected_tensor = tf.reduce_sum(projection_tensor * next_dist_tensor,\n",
    "                axis=-1)\n",
    "\n",
    "        with tf.GradientTape() as tape:\n",
    "            all_q_prob_tensor = self.evaluate_net(state_tensor)\n",
    "            indices = [[idx, action] for idx, action in enumerate(actions)]\n",
    "            q_prob_tensor = tf.gather_nd(all_q_prob_tensor, indices)\n",
    "\n",
    "            cross_entropy_tensor = -tf.reduce_sum(\n",
    "                    tf.math.xlogy(projected_tensor, q_prob_tensor\n",
    "                    + 1e-8))\n",
    "            loss_tensor = tf.reduce_mean(cross_entropy_tensor)\n",
    "        grads = tape.gradient(loss_tensor, self.evaluate_net.variables)\n",
    "        self.evaluate_net.optimizer.apply_gradients(\n",
    "                zip(grads, self.evaluate_net.variables))\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "\n",
    "agent = CategoricalDQNAgent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "165b9ffb",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "50e21fe7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:34:30 [INFO] ==== train ====\n",
      "00:34:44 [DEBUG] train episode 0: reward = -20.00, steps = 935\n",
      "00:35:13 [DEBUG] train episode 1: reward = -19.00, steps = 1009\n",
      "00:35:36 [DEBUG] train episode 2: reward = -21.00, steps = 757\n",
      "00:36:04 [DEBUG] train episode 3: reward = -19.00, steps = 936\n",
      "00:36:29 [DEBUG] train episode 4: reward = -21.00, steps = 851\n",
      "00:36:56 [DEBUG] train episode 5: reward = -21.00, steps = 871\n",
      "00:37:23 [DEBUG] train episode 6: reward = -20.00, steps = 896\n",
      "00:37:55 [DEBUG] train episode 7: reward = -21.00, steps = 1064\n",
      "00:38:21 [DEBUG] train episode 8: reward = -21.00, steps = 861\n",
      "00:38:50 [DEBUG] train episode 9: reward = -19.00, steps = 961\n",
      "00:39:16 [DEBUG] train episode 10: reward = -21.00, steps = 881\n",
      "00:39:42 [DEBUG] train episode 11: reward = -21.00, steps = 853\n",
      "00:40:12 [DEBUG] train episode 12: reward = -21.00, steps = 985\n",
      "00:40:40 [DEBUG] train episode 13: reward = -19.00, steps = 933\n",
      "00:41:06 [DEBUG] train episode 14: reward = -20.00, steps = 843\n",
      "00:41:29 [DEBUG] train episode 15: reward = -21.00, steps = 762\n",
      "00:41:57 [DEBUG] train episode 16: reward = -20.00, steps = 926\n",
      "00:42:29 [DEBUG] train episode 17: reward = -20.00, steps = 1078\n",
      "00:42:59 [DEBUG] train episode 18: reward = -21.00, steps = 971\n",
      "00:43:37 [DEBUG] train episode 19: reward = -18.00, steps = 1252\n",
      "00:44:02 [DEBUG] train episode 20: reward = -21.00, steps = 820\n",
      "00:44:30 [DEBUG] train episode 21: reward = -21.00, steps = 926\n",
      "00:44:58 [DEBUG] train episode 22: reward = -20.00, steps = 942\n",
      "00:45:23 [DEBUG] train episode 23: reward = -21.00, steps = 824\n",
      "00:45:54 [DEBUG] train episode 24: reward = -20.00, steps = 1013\n",
      "00:46:20 [DEBUG] train episode 25: reward = -21.00, steps = 845\n",
      "00:46:48 [DEBUG] train episode 26: reward = -20.00, steps = 947\n",
      "00:47:16 [DEBUG] train episode 27: reward = -20.00, steps = 926\n",
      "00:47:48 [DEBUG] train episode 28: reward = -20.00, steps = 1016\n",
      "00:48:17 [DEBUG] train episode 29: reward = -21.00, steps = 941\n",
      "00:48:50 [DEBUG] train episode 30: reward = -19.00, steps = 1108\n",
      "00:49:16 [DEBUG] train episode 31: reward = -21.00, steps = 837\n",
      "00:49:43 [DEBUG] train episode 32: reward = -20.00, steps = 883\n",
      "00:50:07 [DEBUG] train episode 33: reward = -21.00, steps = 779\n",
      "00:50:32 [DEBUG] train episode 34: reward = -20.00, steps = 836\n",
      "00:51:01 [DEBUG] train episode 35: reward = -20.00, steps = 914\n",
      "00:51:28 [DEBUG] train episode 36: reward = -21.00, steps = 908\n",
      "00:51:58 [DEBUG] train episode 37: reward = -20.00, steps = 961\n",
      "00:52:23 [DEBUG] train episode 38: reward = -20.00, steps = 837\n",
      "00:52:54 [DEBUG] train episode 39: reward = -19.00, steps = 1008\n",
      "00:53:21 [DEBUG] train episode 40: reward = -21.00, steps = 882\n",
      "00:53:44 [DEBUG] train episode 41: reward = -21.00, steps = 760\n",
      "00:54:09 [DEBUG] train episode 42: reward = -21.00, steps = 790\n",
      "00:54:35 [DEBUG] train episode 43: reward = -21.00, steps = 859\n",
      "00:54:58 [DEBUG] train episode 44: reward = -21.00, steps = 763\n",
      "00:55:22 [DEBUG] train episode 45: reward = -21.00, steps = 776\n",
      "00:55:48 [DEBUG] train episode 46: reward = -21.00, steps = 864\n",
      "00:56:14 [DEBUG] train episode 47: reward = -21.00, steps = 851\n",
      "00:56:47 [DEBUG] train episode 48: reward = -20.00, steps = 1077\n",
      "00:57:17 [DEBUG] train episode 49: reward = -21.00, steps = 966\n",
      "00:57:44 [DEBUG] train episode 50: reward = -20.00, steps = 895\n",
      "00:58:13 [DEBUG] train episode 51: reward = -20.00, steps = 937\n",
      "00:58:38 [DEBUG] train episode 52: reward = -21.00, steps = 821\n",
      "00:59:08 [DEBUG] train episode 53: reward = -21.00, steps = 977\n",
      "00:59:36 [DEBUG] train episode 54: reward = -21.00, steps = 909\n",
      "01:00:07 [DEBUG] train episode 55: reward = -20.00, steps = 975\n",
      "01:00:35 [DEBUG] train episode 56: reward = -20.00, steps = 899\n",
      "01:01:01 [DEBUG] train episode 57: reward = -21.00, steps = 851\n",
      "01:01:35 [DEBUG] train episode 58: reward = -19.00, steps = 1099\n",
      "01:02:02 [DEBUG] train episode 59: reward = -21.00, steps = 866\n",
      "01:02:32 [DEBUG] train episode 60: reward = -20.00, steps = 971\n",
      "01:03:01 [DEBUG] train episode 61: reward = -20.00, steps = 928\n",
      "01:03:35 [DEBUG] train episode 62: reward = -20.00, steps = 1085\n",
      "01:04:04 [DEBUG] train episode 63: reward = -21.00, steps = 916\n",
      "01:04:30 [DEBUG] train episode 64: reward = -21.00, steps = 861\n",
      "01:05:01 [DEBUG] train episode 65: reward = -20.00, steps = 984\n",
      "01:05:32 [DEBUG] train episode 66: reward = -21.00, steps = 972\n",
      "01:06:04 [DEBUG] train episode 67: reward = -19.00, steps = 1034\n",
      "01:06:33 [DEBUG] train episode 68: reward = -20.00, steps = 897\n",
      "01:07:00 [DEBUG] train episode 69: reward = -20.00, steps = 866\n",
      "01:07:29 [DEBUG] train episode 70: reward = -21.00, steps = 940\n",
      "01:07:57 [DEBUG] train episode 71: reward = -20.00, steps = 863\n",
      "01:08:27 [DEBUG] train episode 72: reward = -19.00, steps = 965\n",
      "01:08:54 [DEBUG] train episode 73: reward = -20.00, steps = 855\n",
      "01:09:24 [DEBUG] train episode 74: reward = -20.00, steps = 919\n",
      "01:09:58 [DEBUG] train episode 75: reward = -19.00, steps = 1063\n",
      "01:10:36 [DEBUG] train episode 76: reward = -18.00, steps = 1188\n",
      "01:11:02 [DEBUG] train episode 77: reward = -21.00, steps = 824\n",
      "01:11:33 [DEBUG] train episode 78: reward = -19.00, steps = 1002\n",
      "01:12:04 [DEBUG] train episode 79: reward = -20.00, steps = 944\n",
      "01:12:36 [DEBUG] train episode 80: reward = -19.00, steps = 1004\n",
      "01:13:03 [DEBUG] train episode 81: reward = -21.00, steps = 854\n",
      "01:13:36 [DEBUG] train episode 82: reward = -19.00, steps = 1012\n",
      "01:14:03 [DEBUG] train episode 83: reward = -21.00, steps = 821\n",
      "01:14:31 [DEBUG] train episode 84: reward = -20.00, steps = 893\n",
      "01:15:04 [DEBUG] train episode 85: reward = -20.00, steps = 1023\n",
      "01:15:32 [DEBUG] train episode 86: reward = -21.00, steps = 868\n",
      "01:16:05 [DEBUG] train episode 87: reward = -21.00, steps = 1005\n",
      "01:16:41 [DEBUG] train episode 88: reward = -19.00, steps = 1116\n",
      "01:17:13 [DEBUG] train episode 89: reward = -19.00, steps = 998\n",
      "01:17:48 [DEBUG] train episode 90: reward = -19.00, steps = 1078\n",
      "01:18:14 [DEBUG] train episode 91: reward = -21.00, steps = 805\n",
      "01:18:43 [DEBUG] train episode 92: reward = -20.00, steps = 863\n",
      "01:19:07 [DEBUG] train episode 93: reward = -21.00, steps = 764\n",
      "01:19:33 [DEBUG] train episode 94: reward = -21.00, steps = 809\n",
      "01:19:58 [DEBUG] train episode 95: reward = -21.00, steps = 760\n",
      "01:20:27 [DEBUG] train episode 96: reward = -21.00, steps = 909\n",
      "01:20:55 [DEBUG] train episode 97: reward = -21.00, steps = 877\n",
      "01:21:21 [DEBUG] train episode 98: reward = -21.00, steps = 789\n",
      "01:21:56 [DEBUG] train episode 99: reward = -20.00, steps = 1086\n",
      "01:22:21 [DEBUG] train episode 100: reward = -21.00, steps = 758\n",
      "01:22:45 [DEBUG] train episode 101: reward = -21.00, steps = 763\n",
      "01:23:17 [DEBUG] train episode 102: reward = -19.00, steps = 980\n",
      "01:23:52 [DEBUG] train episode 103: reward = -19.00, steps = 1093\n",
      "01:24:21 [DEBUG] train episode 104: reward = -21.00, steps = 879\n",
      "01:24:53 [DEBUG] train episode 105: reward = -20.00, steps = 1002\n",
      "01:25:22 [DEBUG] train episode 106: reward = -21.00, steps = 905\n",
      "01:25:51 [DEBUG] train episode 107: reward = -21.00, steps = 885\n",
      "01:26:44 [DEBUG] train episode 108: reward = -21.00, steps = 810\n",
      "01:29:29 [DEBUG] train episode 109: reward = -21.00, steps = 970\n",
      "01:31:38 [DEBUG] train episode 110: reward = -21.00, steps = 760\n",
      "01:33:58 [DEBUG] train episode 111: reward = -21.00, steps = 818\n",
      "01:36:46 [DEBUG] train episode 112: reward = -20.00, steps = 986\n",
      "01:39:33 [DEBUG] train episode 113: reward = -20.00, steps = 985\n",
      "01:42:11 [DEBUG] train episode 114: reward = -19.00, steps = 929\n",
      "01:44:52 [DEBUG] train episode 115: reward = -20.00, steps = 945\n",
      "01:47:35 [DEBUG] train episode 116: reward = -20.00, steps = 960\n",
      "01:50:10 [DEBUG] train episode 117: reward = -21.00, steps = 910\n",
      "01:52:47 [DEBUG] train episode 118: reward = -20.00, steps = 926\n",
      "01:55:17 [DEBUG] train episode 119: reward = -21.00, steps = 879\n",
      "01:58:20 [DEBUG] train episode 120: reward = -18.00, steps = 1082\n",
      "02:00:42 [DEBUG] train episode 121: reward = -20.00, steps = 841\n",
      "02:03:26 [DEBUG] train episode 122: reward = -20.00, steps = 963\n",
      "02:05:53 [DEBUG] train episode 123: reward = -20.00, steps = 864\n",
      "02:08:31 [DEBUG] train episode 124: reward = -19.00, steps = 928\n",
      "02:10:43 [DEBUG] train episode 125: reward = -21.00, steps = 777\n",
      "02:13:21 [DEBUG] train episode 126: reward = -21.00, steps = 922\n",
      "02:15:48 [DEBUG] train episode 127: reward = -20.00, steps = 869\n",
      "02:19:04 [DEBUG] train episode 128: reward = -18.00, steps = 1156\n",
      "02:21:51 [DEBUG] train episode 129: reward = -20.00, steps = 977\n",
      "02:24:13 [DEBUG] train episode 130: reward = -20.00, steps = 837\n",
      "02:27:12 [DEBUG] train episode 131: reward = -20.00, steps = 1052\n",
      "02:29:44 [DEBUG] train episode 132: reward = -20.00, steps = 894\n",
      "02:32:46 [DEBUG] train episode 133: reward = -19.00, steps = 1066\n",
      "02:35:16 [DEBUG] train episode 134: reward = -20.00, steps = 880\n",
      "02:37:46 [DEBUG] train episode 135: reward = -21.00, steps = 879\n",
      "02:40:15 [DEBUG] train episode 136: reward = -20.00, steps = 872\n",
      "02:42:38 [DEBUG] train episode 137: reward = -21.00, steps = 842\n",
      "02:45:30 [DEBUG] train episode 138: reward = -21.00, steps = 1016\n",
      "02:48:28 [DEBUG] train episode 139: reward = -20.00, steps = 1047\n",
      "02:51:42 [DEBUG] train episode 140: reward = -19.00, steps = 1145\n",
      "02:55:21 [DEBUG] train episode 141: reward = -17.00, steps = 1288\n",
      "02:58:09 [DEBUG] train episode 142: reward = -20.00, steps = 976\n",
      "03:01:57 [DEBUG] train episode 143: reward = -18.00, steps = 1280\n",
      "03:04:55 [DEBUG] train episode 144: reward = -21.00, steps = 1030\n",
      "03:08:15 [DEBUG] train episode 145: reward = -18.00, steps = 1154\n",
      "03:11:54 [DEBUG] train episode 146: reward = -20.00, steps = 1286\n",
      "03:16:20 [DEBUG] train episode 147: reward = -18.00, steps = 1566\n",
      "03:20:42 [DEBUG] train episode 148: reward = -18.00, steps = 1542\n",
      "03:24:44 [DEBUG] train episode 149: reward = -18.00, steps = 1425\n",
      "03:28:56 [DEBUG] train episode 150: reward = -18.00, steps = 1491\n",
      "03:33:11 [DEBUG] train episode 151: reward = -20.00, steps = 1498\n",
      "03:37:51 [DEBUG] train episode 152: reward = -14.00, steps = 1637\n",
      "03:42:53 [DEBUG] train episode 153: reward = -15.00, steps = 1776\n",
      "03:47:11 [DEBUG] train episode 154: reward = -16.00, steps = 1514\n",
      "03:51:04 [DEBUG] train episode 155: reward = -19.00, steps = 1375\n",
      "03:55:32 [DEBUG] train episode 156: reward = -15.00, steps = 1574\n",
      "04:00:45 [DEBUG] train episode 157: reward = -15.00, steps = 1834\n",
      "04:05:13 [DEBUG] train episode 158: reward = -17.00, steps = 1571\n",
      "04:09:36 [DEBUG] train episode 159: reward = -19.00, steps = 1540\n",
      "04:14:57 [DEBUG] train episode 160: reward = -16.00, steps = 1879\n",
      "04:21:00 [DEBUG] train episode 161: reward = -10.00, steps = 2106\n",
      "04:25:35 [DEBUG] train episode 162: reward = -15.00, steps = 1603\n",
      "04:31:47 [DEBUG] train episode 163: reward = -12.00, steps = 2167\n",
      "04:38:45 [DEBUG] train episode 164: reward = -11.00, steps = 2456\n",
      "04:44:03 [DEBUG] train episode 165: reward = -13.00, steps = 1865\n",
      "04:49:12 [DEBUG] train episode 166: reward = -16.00, steps = 1805\n",
      "04:55:34 [DEBUG] train episode 167: reward = -13.00, steps = 2236\n",
      "05:01:06 [DEBUG] train episode 168: reward = -12.00, steps = 1935\n",
      "05:06:59 [DEBUG] train episode 169: reward = -9.00, steps = 2055\n",
      "05:13:18 [DEBUG] train episode 170: reward = -12.00, steps = 2208\n",
      "05:20:44 [DEBUG] train episode 171: reward = -5.00, steps = 2551\n",
      "05:27:59 [DEBUG] train episode 172: reward = -2.00, steps = 2969\n",
      "05:35:16 [DEBUG] train episode 173: reward = -3.00, steps = 3039\n",
      "05:40:48 [DEBUG] train episode 174: reward = -7.00, steps = 2311\n",
      "05:47:56 [DEBUG] train episode 175: reward = -4.00, steps = 2988\n",
      "05:55:38 [DEBUG] train episode 176: reward = 5.00, steps = 3198\n",
      "06:04:04 [DEBUG] train episode 177: reward = -1.00, steps = 3519\n",
      "06:11:55 [DEBUG] train episode 178: reward = 3.00, steps = 3294\n",
      "06:19:31 [DEBUG] train episode 179: reward = -6.00, steps = 3197\n",
      "06:27:16 [DEBUG] train episode 180: reward = 1.00, steps = 3234\n",
      "06:35:17 [DEBUG] train episode 181: reward = 4.00, steps = 3359\n",
      "06:41:55 [DEBUG] train episode 182: reward = -6.00, steps = 2773\n",
      "06:50:12 [DEBUG] train episode 183: reward = -2.00, steps = 3472\n",
      "06:57:08 [DEBUG] train episode 184: reward = 4.00, steps = 2902\n",
      "07:04:13 [DEBUG] train episode 185: reward = 3.00, steps = 2964\n",
      "07:11:53 [DEBUG] train episode 186: reward = -4.00, steps = 3199\n",
      "07:19:21 [DEBUG] train episode 187: reward = 2.00, steps = 3125\n",
      "07:26:49 [DEBUG] train episode 188: reward = 4.00, steps = 3108\n",
      "07:35:17 [DEBUG] train episode 189: reward = -1.00, steps = 3542\n",
      "07:42:10 [DEBUG] train episode 190: reward = 5.00, steps = 2921\n",
      "07:49:08 [DEBUG] train episode 191: reward = -4.00, steps = 3067\n",
      "07:56:39 [DEBUG] train episode 192: reward = -2.00, steps = 3286\n",
      "08:03:39 [DEBUG] train episode 193: reward = 3.00, steps = 3075\n",
      "08:11:01 [DEBUG] train episode 194: reward = -4.00, steps = 3235\n",
      "08:17:34 [DEBUG] train episode 195: reward = 4.00, steps = 2892\n",
      "08:24:35 [DEBUG] train episode 196: reward = -4.00, steps = 3042\n",
      "08:31:19 [DEBUG] train episode 197: reward = 6.00, steps = 2968\n",
      "08:37:32 [DEBUG] train episode 198: reward = 9.00, steps = 2749\n",
      "08:43:31 [DEBUG] train episode 199: reward = 3.00, steps = 2678\n",
      "08:49:35 [DEBUG] train episode 200: reward = 1.00, steps = 3288\n",
      "08:54:54 [DEBUG] train episode 201: reward = 6.00, steps = 2881\n",
      "09:00:16 [DEBUG] train episode 202: reward = -3.00, steps = 2914\n",
      "09:05:58 [DEBUG] train episode 203: reward = -1.00, steps = 3089\n",
      "09:10:46 [DEBUG] train episode 204: reward = -3.00, steps = 2600\n",
      "09:16:45 [DEBUG] train episode 205: reward = 1.00, steps = 3255\n",
      "09:22:47 [DEBUG] train episode 206: reward = 3.00, steps = 3276\n",
      "09:29:10 [DEBUG] train episode 207: reward = 1.00, steps = 3466\n",
      "09:35:06 [DEBUG] train episode 208: reward = 2.00, steps = 3225\n",
      "09:41:24 [DEBUG] train episode 209: reward = -1.00, steps = 3416\n",
      "09:47:00 [DEBUG] train episode 210: reward = 2.00, steps = 3187\n",
      "09:52:06 [DEBUG] train episode 211: reward = 2.00, steps = 2934\n",
      "09:57:26 [DEBUG] train episode 212: reward = 7.00, steps = 3070\n",
      "10:02:24 [DEBUG] train episode 213: reward = 5.00, steps = 2857\n",
      "10:08:12 [DEBUG] train episode 214: reward = 3.00, steps = 3335\n",
      "10:13:00 [DEBUG] train episode 215: reward = 7.00, steps = 2773\n",
      "10:18:11 [DEBUG] train episode 216: reward = 6.00, steps = 2974\n",
      "10:22:17 [DEBUG] train episode 217: reward = -9.00, steps = 2357\n",
      "10:27:54 [DEBUG] train episode 218: reward = -2.00, steps = 3226\n",
      "10:33:04 [DEBUG] train episode 219: reward = 5.00, steps = 2969\n",
      "10:38:11 [DEBUG] train episode 220: reward = 3.00, steps = 2947\n",
      "10:43:17 [DEBUG] train episode 221: reward = 2.00, steps = 2948\n",
      "10:48:17 [DEBUG] train episode 222: reward = -2.00, steps = 2872\n",
      "10:53:16 [DEBUG] train episode 223: reward = 3.00, steps = 2871\n",
      "10:58:36 [DEBUG] train episode 224: reward = 4.00, steps = 3073\n",
      "11:03:49 [DEBUG] train episode 225: reward = 1.00, steps = 2999\n",
      "11:09:39 [DEBUG] train episode 226: reward = -1.00, steps = 3347\n",
      "11:15:26 [DEBUG] train episode 227: reward = 6.00, steps = 3338\n",
      "11:20:15 [DEBUG] train episode 228: reward = 7.00, steps = 2773\n",
      "11:25:38 [DEBUG] train episode 229: reward = -4.00, steps = 3076\n",
      "11:31:10 [DEBUG] train episode 230: reward = 5.00, steps = 3141\n",
      "11:36:51 [DEBUG] train episode 231: reward = 6.00, steps = 3221\n",
      "11:42:11 [DEBUG] train episode 232: reward = 2.00, steps = 3035\n",
      "11:46:39 [DEBUG] train episode 233: reward = 10.00, steps = 2529\n",
      "11:52:15 [DEBUG] train episode 234: reward = -1.00, steps = 3180\n",
      "11:57:01 [DEBUG] train episode 235: reward = 6.00, steps = 2684\n",
      "12:02:29 [DEBUG] train episode 236: reward = 6.00, steps = 3069\n",
      "12:07:39 [DEBUG] train episode 237: reward = 3.00, steps = 2882\n",
      "12:12:58 [DEBUG] train episode 238: reward = 3.00, steps = 2967\n",
      "12:17:40 [DEBUG] train episode 239: reward = 8.00, steps = 2625\n",
      "12:23:52 [DEBUG] train episode 240: reward = -1.00, steps = 3477\n",
      "12:28:02 [DEBUG] train episode 241: reward = 13.00, steps = 2336\n",
      "12:32:50 [DEBUG] train episode 242: reward = -3.00, steps = 2662\n",
      "12:38:04 [DEBUG] train episode 243: reward = 3.00, steps = 2933\n",
      "12:43:42 [DEBUG] train episode 244: reward = -2.00, steps = 3145\n",
      "12:48:24 [DEBUG] train episode 245: reward = -3.00, steps = 2621\n",
      "12:53:40 [DEBUG] train episode 246: reward = -2.00, steps = 2949\n",
      "12:58:31 [DEBUG] train episode 247: reward = -3.00, steps = 2616\n",
      "13:04:16 [DEBUG] train episode 248: reward = -2.00, steps = 3147\n",
      "13:09:44 [DEBUG] train episode 249: reward = -1.00, steps = 2840\n",
      "13:15:28 [DEBUG] train episode 250: reward = -1.00, steps = 3248\n",
      "13:21:02 [DEBUG] train episode 251: reward = -2.00, steps = 3161\n",
      "13:25:26 [DEBUG] train episode 252: reward = 9.00, steps = 2510\n",
      "13:30:54 [DEBUG] train episode 253: reward = -2.00, steps = 3108\n",
      "13:35:38 [DEBUG] train episode 254: reward = -3.00, steps = 2707\n",
      "13:39:49 [DEBUG] train episode 255: reward = 12.00, steps = 2385\n",
      "13:44:56 [DEBUG] train episode 256: reward = -3.00, steps = 2907\n",
      "13:49:54 [DEBUG] train episode 257: reward = -3.00, steps = 2834\n",
      "13:54:17 [DEBUG] train episode 258: reward = -4.00, steps = 2516\n",
      "13:58:31 [DEBUG] train episode 259: reward = 10.00, steps = 2428\n",
      "14:03:37 [DEBUG] train episode 260: reward = 1.00, steps = 2923\n",
      "14:08:36 [DEBUG] train episode 261: reward = -1.00, steps = 2843\n",
      "14:13:58 [DEBUG] train episode 262: reward = 1.00, steps = 3050\n",
      "14:18:14 [DEBUG] train episode 263: reward = 12.00, steps = 2408\n",
      "14:23:58 [DEBUG] train episode 264: reward = -1.00, steps = 3250\n",
      "14:28:25 [DEBUG] train episode 265: reward = 13.00, steps = 2515\n",
      "14:33:35 [DEBUG] train episode 266: reward = -3.00, steps = 2913\n",
      "14:38:08 [DEBUG] train episode 267: reward = 7.00, steps = 2588\n",
      "14:42:59 [DEBUG] train episode 268: reward = -2.00, steps = 2731\n",
      "14:47:51 [DEBUG] train episode 269: reward = 8.00, steps = 2760\n",
      "14:52:47 [DEBUG] train episode 270: reward = 9.00, steps = 2776\n",
      "14:57:11 [DEBUG] train episode 271: reward = 10.00, steps = 2482\n",
      "15:01:51 [DEBUG] train episode 272: reward = 11.00, steps = 2630\n",
      "15:06:07 [DEBUG] train episode 273: reward = 13.00, steps = 2401\n",
      "15:09:28 [DEBUG] train episode 274: reward = 20.00, steps = 1895\n",
      "15:13:20 [DEBUG] train episode 275: reward = 15.00, steps = 2175\n",
      "15:17:34 [DEBUG] train episode 276: reward = 13.00, steps = 2385\n",
      "15:23:16 [DEBUG] train episode 277: reward = 2.00, steps = 3196\n",
      "15:27:50 [DEBUG] train episode 278: reward = 9.00, steps = 2578\n",
      "15:31:48 [DEBUG] train episode 279: reward = 16.00, steps = 2231\n",
      "15:37:32 [DEBUG] train episode 280: reward = 3.00, steps = 3229\n",
      "15:42:19 [DEBUG] train episode 281: reward = 7.00, steps = 2699\n",
      "15:46:41 [DEBUG] train episode 282: reward = 13.00, steps = 2472\n",
      "15:51:07 [DEBUG] train episode 283: reward = 10.00, steps = 2508\n",
      "15:55:21 [DEBUG] train episode 284: reward = 13.00, steps = 2377\n",
      "16:00:24 [DEBUG] train episode 285: reward = 2.00, steps = 2854\n",
      "16:04:58 [DEBUG] train episode 286: reward = 10.00, steps = 2572\n",
      "16:08:22 [DEBUG] train episode 287: reward = 18.00, steps = 1916\n",
      "16:12:48 [DEBUG] train episode 288: reward = 15.00, steps = 2499\n",
      "16:16:56 [DEBUG] train episode 289: reward = 16.00, steps = 2341\n",
      "16:22:02 [DEBUG] train episode 290: reward = 5.00, steps = 2898\n",
      "16:26:20 [DEBUG] train episode 291: reward = 13.00, steps = 2428\n",
      "16:31:26 [DEBUG] train episode 292: reward = 7.00, steps = 2897\n",
      "16:35:09 [DEBUG] train episode 293: reward = 15.00, steps = 2125\n",
      "16:40:35 [DEBUG] train episode 294: reward = 3.00, steps = 3075\n",
      "16:44:02 [DEBUG] train episode 295: reward = 18.00, steps = 1967\n",
      "16:47:44 [DEBUG] train episode 296: reward = 16.00, steps = 2104\n",
      "16:52:43 [DEBUG] train episode 297: reward = 8.00, steps = 2826\n",
      "16:57:24 [DEBUG] train episode 298: reward = 11.00, steps = 2648\n",
      "17:02:50 [DEBUG] train episode 299: reward = 5.00, steps = 3076\n",
      "17:06:57 [DEBUG] train episode 300: reward = 12.00, steps = 2330\n",
      "17:11:09 [DEBUG] train episode 301: reward = 11.00, steps = 2371\n",
      "17:15:14 [DEBUG] train episode 302: reward = 13.00, steps = 2305\n",
      "17:18:52 [DEBUG] train episode 303: reward = 16.00, steps = 2074\n",
      "17:23:24 [DEBUG] train episode 304: reward = 10.00, steps = 2562\n",
      "17:27:43 [DEBUG] train episode 305: reward = 12.00, steps = 2433\n",
      "17:32:14 [DEBUG] train episode 306: reward = 11.00, steps = 2549\n",
      "17:35:43 [DEBUG] train episode 307: reward = 16.00, steps = 1960\n",
      "17:40:22 [DEBUG] train episode 308: reward = 9.00, steps = 2611\n",
      "17:43:57 [DEBUG] train episode 309: reward = 17.00, steps = 2011\n",
      "17:48:16 [DEBUG] train episode 310: reward = 11.00, steps = 2432\n",
      "17:52:09 [DEBUG] train episode 311: reward = 15.00, steps = 2176\n",
      "17:56:19 [DEBUG] train episode 312: reward = 14.00, steps = 2336\n",
      "18:00:17 [DEBUG] train episode 313: reward = 15.00, steps = 2235\n",
      "18:03:44 [DEBUG] train episode 314: reward = 18.00, steps = 1957\n",
      "18:08:16 [DEBUG] train episode 315: reward = 13.00, steps = 2566\n",
      "18:13:14 [DEBUG] train episode 316: reward = 8.00, steps = 2809\n",
      "18:17:21 [DEBUG] train episode 317: reward = 13.00, steps = 2340\n",
      "18:20:56 [DEBUG] train episode 318: reward = 18.00, steps = 1942\n",
      "18:24:31 [DEBUG] train episode 319: reward = 16.00, steps = 1978\n",
      "18:28:11 [DEBUG] train episode 320: reward = 16.00, steps = 2062\n",
      "18:32:50 [DEBUG] train episode 321: reward = 8.00, steps = 2604\n",
      "18:37:28 [DEBUG] train episode 322: reward = 10.00, steps = 2592\n",
      "18:40:58 [DEBUG] train episode 323: reward = 18.00, steps = 1943\n",
      "18:45:18 [DEBUG] train episode 324: reward = 12.00, steps = 2420\n",
      "18:48:35 [DEBUG] train episode 325: reward = 19.00, steps = 1825\n",
      "18:51:46 [DEBUG] train episode 326: reward = 19.00, steps = 1761\n",
      "18:55:08 [DEBUG] train episode 327: reward = 16.00, steps = 1870\n",
      "18:55:09 [INFO] ==== test ====\n",
      "18:55:28 [DEBUG] test episode 0: reward = 10.00, steps = 2217\n",
      "18:55:50 [DEBUG] test episode 1: reward = 10.00, steps = 2210\n",
      "18:56:14 [DEBUG] test episode 2: reward = 10.00, steps = 2214\n",
      "18:56:34 [DEBUG] test episode 3: reward = 18.00, steps = 1914\n",
      "18:56:52 [DEBUG] test episode 4: reward = 19.00, steps = 1785\n",
      "18:57:11 [DEBUG] test episode 5: reward = 19.00, steps = 1797\n",
      "18:57:29 [DEBUG] test episode 6: reward = 19.00, steps = 1781\n",
      "18:57:47 [DEBUG] test episode 7: reward = 19.00, steps = 1788\n",
      "18:58:05 [DEBUG] test episode 8: reward = 19.00, steps = 1784\n",
      "18:58:27 [DEBUG] test episode 9: reward = 10.00, steps = 2216\n",
      "18:58:45 [DEBUG] test episode 10: reward = 19.00, steps = 1798\n",
      "18:59:02 [DEBUG] test episode 11: reward = 18.00, steps = 1916\n",
      "18:59:19 [DEBUG] test episode 12: reward = 18.00, steps = 1913\n",
      "18:59:35 [DEBUG] test episode 13: reward = 19.00, steps = 1798\n",
      "18:59:51 [DEBUG] test episode 14: reward = 19.00, steps = 1797\n",
      "19:00:07 [DEBUG] test episode 15: reward = 19.00, steps = 1787\n",
      "19:00:26 [DEBUG] test episode 16: reward = 10.00, steps = 2215\n",
      "19:00:46 [DEBUG] test episode 17: reward = 10.00, steps = 2215\n",
      "19:01:02 [DEBUG] test episode 18: reward = 19.00, steps = 1799\n",
      "19:01:19 [DEBUG] test episode 19: reward = 18.00, steps = 1910\n",
      "19:01:35 [DEBUG] test episode 20: reward = 19.00, steps = 1793\n",
      "19:01:52 [DEBUG] test episode 21: reward = 18.00, steps = 1911\n",
      "19:02:08 [DEBUG] test episode 22: reward = 19.00, steps = 1782\n",
      "19:02:23 [DEBUG] test episode 23: reward = 19.00, steps = 1796\n",
      "19:02:43 [DEBUG] test episode 24: reward = 10.00, steps = 2216\n",
      "19:02:59 [DEBUG] test episode 25: reward = 19.00, steps = 1785\n",
      "19:03:15 [DEBUG] test episode 26: reward = 19.00, steps = 1781\n",
      "19:03:32 [DEBUG] test episode 27: reward = 18.00, steps = 1916\n",
      "19:03:47 [DEBUG] test episode 28: reward = 19.00, steps = 1784\n",
      "19:04:03 [DEBUG] test episode 29: reward = 19.00, steps = 1787\n",
      "19:04:19 [DEBUG] test episode 30: reward = 19.00, steps = 1786\n",
      "19:04:35 [DEBUG] test episode 31: reward = 19.00, steps = 1795\n",
      "19:04:51 [DEBUG] test episode 32: reward = 19.00, steps = 1798\n",
      "19:05:08 [DEBUG] test episode 33: reward = 18.00, steps = 1912\n",
      "19:05:24 [DEBUG] test episode 34: reward = 19.00, steps = 1785\n",
      "19:05:40 [DEBUG] test episode 35: reward = 19.00, steps = 1795\n",
      "19:05:56 [DEBUG] test episode 36: reward = 19.00, steps = 1796\n",
      "19:06:13 [DEBUG] test episode 37: reward = 18.00, steps = 1911\n",
      "19:06:29 [DEBUG] test episode 38: reward = 19.00, steps = 1793\n",
      "19:06:45 [DEBUG] test episode 39: reward = 19.00, steps = 1798\n",
      "19:07:02 [DEBUG] test episode 40: reward = 18.00, steps = 1916\n",
      "19:07:18 [DEBUG] test episode 41: reward = 19.00, steps = 1788\n",
      "19:07:37 [DEBUG] test episode 42: reward = 10.00, steps = 2216\n",
      "19:07:55 [DEBUG] test episode 43: reward = 18.00, steps = 1916\n",
      "19:08:10 [DEBUG] test episode 44: reward = 19.00, steps = 1783\n",
      "19:08:30 [DEBUG] test episode 45: reward = 10.00, steps = 2215\n",
      "19:08:50 [DEBUG] test episode 46: reward = 10.00, steps = 2216\n",
      "19:09:06 [DEBUG] test episode 47: reward = 19.00, steps = 1799\n",
      "19:09:26 [DEBUG] test episode 48: reward = 10.00, steps = 2213\n",
      "19:09:41 [DEBUG] test episode 49: reward = 19.00, steps = 1785\n",
      "19:09:57 [DEBUG] test episode 50: reward = 19.00, steps = 1796\n",
      "19:10:13 [DEBUG] test episode 51: reward = 19.00, steps = 1786\n",
      "19:10:29 [DEBUG] test episode 52: reward = 19.00, steps = 1786\n",
      "19:10:49 [DEBUG] test episode 53: reward = 10.00, steps = 2213\n",
      "19:11:06 [DEBUG] test episode 54: reward = 18.00, steps = 1910\n",
      "19:11:22 [DEBUG] test episode 55: reward = 19.00, steps = 1787\n",
      "19:11:40 [DEBUG] test episode 56: reward = 18.00, steps = 1915\n",
      "19:11:55 [DEBUG] test episode 57: reward = 19.00, steps = 1781\n",
      "19:12:15 [DEBUG] test episode 58: reward = 10.00, steps = 2215\n",
      "19:12:31 [DEBUG] test episode 59: reward = 19.00, steps = 1781\n",
      "19:12:48 [DEBUG] test episode 60: reward = 18.00, steps = 1916\n",
      "19:13:08 [DEBUG] test episode 61: reward = 10.00, steps = 2213\n",
      "19:13:24 [DEBUG] test episode 62: reward = 19.00, steps = 1796\n",
      "19:13:43 [DEBUG] test episode 63: reward = 10.00, steps = 2212\n",
      "19:13:59 [DEBUG] test episode 64: reward = 19.00, steps = 1785\n",
      "19:14:15 [DEBUG] test episode 65: reward = 19.00, steps = 1784\n",
      "19:14:35 [DEBUG] test episode 66: reward = 10.00, steps = 2217\n",
      "19:14:51 [DEBUG] test episode 67: reward = 19.00, steps = 1798\n",
      "19:15:07 [DEBUG] test episode 68: reward = 19.00, steps = 1798\n",
      "19:15:23 [DEBUG] test episode 69: reward = 19.00, steps = 1784\n",
      "19:15:42 [DEBUG] test episode 70: reward = 10.00, steps = 2216\n",
      "19:15:58 [DEBUG] test episode 71: reward = 19.00, steps = 1799\n",
      "19:16:16 [DEBUG] test episode 72: reward = 18.00, steps = 1910\n",
      "19:16:32 [DEBUG] test episode 73: reward = 19.00, steps = 1796\n",
      "19:16:49 [DEBUG] test episode 74: reward = 18.00, steps = 1910\n",
      "19:17:06 [DEBUG] test episode 75: reward = 18.00, steps = 1910\n",
      "19:17:25 [DEBUG] test episode 76: reward = 10.00, steps = 2213\n",
      "19:17:45 [DEBUG] test episode 77: reward = 10.00, steps = 2213\n",
      "19:18:05 [DEBUG] test episode 78: reward = 10.00, steps = 2217\n",
      "19:18:22 [DEBUG] test episode 79: reward = 18.00, steps = 1914\n",
      "19:18:41 [DEBUG] test episode 80: reward = 10.00, steps = 2213\n",
      "19:19:01 [DEBUG] test episode 81: reward = 10.00, steps = 2210\n",
      "19:19:20 [DEBUG] test episode 82: reward = 10.00, steps = 2217\n",
      "19:19:37 [DEBUG] test episode 83: reward = 18.00, steps = 1915\n",
      "19:19:54 [DEBUG] test episode 84: reward = 18.00, steps = 1915\n",
      "19:20:10 [DEBUG] test episode 85: reward = 19.00, steps = 1788\n",
      "19:20:30 [DEBUG] test episode 86: reward = 10.00, steps = 2211\n",
      "19:20:46 [DEBUG] test episode 87: reward = 19.00, steps = 1798\n",
      "19:21:02 [DEBUG] test episode 88: reward = 19.00, steps = 1788\n",
      "19:21:19 [DEBUG] test episode 89: reward = 18.00, steps = 1911\n",
      "19:21:36 [DEBUG] test episode 90: reward = 18.00, steps = 1914\n",
      "19:21:52 [DEBUG] test episode 91: reward = 19.00, steps = 1794\n",
      "19:22:08 [DEBUG] test episode 92: reward = 19.00, steps = 1794\n",
      "19:22:23 [DEBUG] test episode 93: reward = 19.00, steps = 1788\n",
      "19:22:40 [DEBUG] test episode 94: reward = 19.00, steps = 1793\n",
      "19:22:55 [DEBUG] test episode 95: reward = 19.00, steps = 1784\n",
      "19:23:11 [DEBUG] test episode 96: reward = 19.00, steps = 1799\n",
      "19:23:28 [DEBUG] test episode 97: reward = 18.00, steps = 1912\n",
      "19:23:46 [DEBUG] test episode 98: reward = 18.00, steps = 1912\n",
      "19:24:05 [DEBUG] test episode 99: reward = 10.00, steps = 2213\n",
      "19:24:05 [INFO] average episode reward = 16.52 ± 3.79\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABGWUlEQVR4nO2dd3gc13nu3zOzFVj0QgLsFCmKRZ1WsWRbVrMk25F7je0kdmRfK1d2HCeR4+Q6ub4uN9d2El+XXLlFjossx1Kk2LKtYsnqhaJIkSDF3gCQaASxC2DbzJz7x8w5e2Z2ZgFwAWJ38f2ehw92px7sEu/55j3f+Q7jnIMgCIKoTbT5bgBBEAQxd5DIEwRB1DAk8gRBEDUMiTxBEEQNQyJPEARRw4TmuwEq7e3tfOXKlfPdDIIgiKrixRdfHOacd/jtqyiRX7lyJbZs2TLfzSAIgqgqGGNHgvaRXUMQBFHDkMgTBEHUMCTyBEEQNQyJPEEQRA1DIk8QBFHDlC3yjLFljLFHGWO7GWM9jLFPONtbGWMPMcb2OT9bym8uQRAEMRNmI5I3APwF53w9gMsA3MoY2wDgdgCPcM7XAnjEeU8QBEGcQcoWec75cc75Vud1CsBuAEsA3AzgTuewOwG8pdx7EQRBqHDO8fMtx5DJm/PdlIplVj15xthKABcCeA7AIs75ccDuCAB0BpxzC2NsC2Nsy9DQ0Gw2hyCIGmfvwDj+8j9exqOvDM53UyqWWRN5xlgCwC8AfJJznpzueZzzOzjnmznnmzs6fGflEgRB+DKZMwAAEzmK5IOYFZFnjIVhC/yPOef3OJsHGGNdzv4uANTVEgQxq+QMCwCQJrsmkNnIrmEAvgdgN+f8a8qu+wF8yHn9IQD3lXsvgiAIlZxpi3yGIvlAZiOSvwLABwBczRjb5vy7CcCXAVzHGNsH4DrnPUEQxKxR7ZH8r3ccxy0/3IL/2t4/Z/couwol5/xJACxg9zXlXp8gCCKIahf5n75wDI/vHcKpdB5vPr97Tu5BM14JgqhahF2TrlK7JmfY7U6m83N2DxJ5giCqlqwTyVdrnrx4EiGRJwiC8CFX7SLvPIkkM8ac3YNEniCIqmWmnnzOsPC2bz2FZw6M+O7/yXNH8amfbSu7Xbf8cAvu29YHABgez+KNX38CR0cm5f7f7DyBz9yzQ7Z/PGvAcAR/tiGRJwiiaslKkZ+eQA4kM9h69BQ+/fPtvvv/5t4duOelvrLaxDnHg7sG8Im7tgEAfr3zBHr6k/j27w/IY57aP4z7tvVJkQeA1BxF8yTyBEFULdKumebAq6bZiYBTRf7l2D/e2bcNUTuJcTxbEPGcYSFrWMgZFnSnTWNz5MuTyBMEUbXkTFtQp2vX5I3pZeMMpbKn3SavWEdCtsymMoXtedOCaXFM5Ex0JKIAgGSGRJ4gCMLFTD35vFn6+FjYlsTBMkTemykjngpUO0YMuKYyeXQ02CJPkTxBEISH3DQjc3n8FIObbfW24A6lMtNuw3ceP4j3f/dZ+V6INXOmiKalyLsjeQCwOKTIJ9PkyRMEQbiQtWumHcnzkvvbEhEAM7Nr9gyk0NNfKLwrIvmIrjltczJolEhebUe7c0+K5AmCIDzMdDKUms3iR1M8DGBmdk3etFz3HysSeR+7RmmHjOTJkycIgnCjevKcl47SgYJNEoS4xGByZiKfNSx5fzGxSQy4CisplTVke1XbqCkeRlhnFMkTBEF4EaJp8YJw5gwLI+P+Iq2K6/GxdNF+07KFejDAkz8xlpFibpgWhlJZ5AwOzgsWjBBr3Sdd84XDJwG4O5uIrqExFp6z0gYk8gRBVC2qaGdy9us7nz6MG/7lCd/j84pNcvmXfoeHdw249puOgI9M5IrO3X7sFC770iP4jxd7AQC/2NqL13/lMUw4+e9ZT7GxnE8mz/u/+xwGkxm3yId0NMXDFMkTBEF4Ub1tIaYDyQyGAyJ578BrvyeaF5F81mcG7c7+MQDAi0dGAQDHxzIYzxpSnMUAqxB5YdNkciaWNMfxwctXALAj/bxRaEckpOFLbzsXf3b1mil/39OBRJ4giKrFT+RzpgXOAcsq9ui9nryYiCQQIu/n3Qvhj4V11/0mcp5I3hlAzRoWLIsjnTdRH9VxxZp22T53JK/h0tVtOGdx47R+55lCIk8QRNWi2jUichbCb/iIvDe7xnuM5dg1fvn0YlvUGVAVpRREaqTI9FFtl4xhIp03EQ/rMtsmb3J5LFDIwpkryl4ZiiAIYr7IGRaiIQ1ZwypE8o6Amn4i7xFvccxYOo8DQ+MlI3mRCilEXtxP1KTZeyIFBvekpnTORDpnIhbWEZYi747kxfXmChJ5giCqlpxhobU+Iv1xAMiaIpK3AOiu473iLd7/5Lmj+KeH9mJFW528rhcRfQuxFpUvxfb/9uOtAIDmujBCGoNhcWQMCxnDQrOTJgnYg79nUuTJriEIomrJGhaWNMcBFGapyrRKn5R4Ia4P/vlrARQi+fFsHjnTkpG+38xYYQfJ+jcBpRROTeZx4fJmeUwmZ9s1YUfMs6blun6ERJ4gCMKfnGlhSYu/yBs+Ki/Etd4p/5uX9gx3nevnyYusmenMst28slUek86biEcUT96wXNcnkScIgggg51ghiWhITmAq6ck7++JOhoypTKACCgKeN62iGbSFVMnS5Y0ZAy5c1iyPSedtT16IuV92zVxCIk8QRNWSNUxEQho6GqKy3kxOevL+A69hnSHk+OOGZ6A16wg358WdhJoaCQTbNWd1JGShM5ddoxfKHKj9x1xn15DIEwRRteQMS4r8kFNvRuSr+0XyecNCRNcQ1mzpEyLvZ9N4LZuxado1b71wiSuX3rZrNDnwOunpHCiSJwiC8MEwLVgciOg6OhuiGBp3e/K+Im9aCIc0WVfGMAv2jP2zcI46KxUopEaKTsTPrvnrG87Bra9fI+2gVMaAYXFXnryYPCUgkScIgvBBRNqRkIbOhhgGk25P3t+u4QjrGkKa166ZOqfeW77AT+Tro7a4xyP2z1GnBo6aJz+RdYt8VHenec42JPIEQVQlQsyFXTORMzGhlPMNiuQjugZNY9AYYJjFM1xF3ro6OJpTJluJSN7PrqmL2Fk7sZAj8pMFkRcR+0T2zNo1NBmKIIiy+dZj+7GhqxFXreuc1es+e3AETx8YwaeuO7tonyryzcpiHzllMtQ9W3uRNSwk03mc1ZGw7RrHGw/pGo6enMRf/nw7JhULJR7RkfVMWFIX9Mjm7Zo0GZ8iZglvJD9pnxcvEcmTyBMEUfH84KnDuOrsjlkX+d/2nMBdzx/zFXkhvI2xkPTAJ7KGHBi1LOBTd293nXPjpsVSbEMaw5P7h3FyIidXZwJEFJ53zXo9qZQezhimq/aMiojkxdOAsGviEd134FXXmBwfmCvIriEIomwsi8ta7GfqumL1po6GqKsuTOnJUJaMnEMak2mQ6vqrsXAhn917r7b6CLJ5KzBHXnjyjDHEwzpOOnZNPKyDMYawzlwDr3OdPgmQyBMEMQsYFvct7TuX1xV58Z0NsUK2jMWlOPsXKOOFSF7XpFiroi3SH9XB2KFxe1B3aWudqxiaFzGTFrCjd3XgFbDr3kwqnvxcWzUAiTxBELOAHXHPwXV5cCQvyhh0Nkbl5KZs3pITjfyya0SePACZYeOlIPLFkfzSljiyhhk4Eao+ooh8WJcDr8Kjj4Q0WUitPqLPeXEygDx5giBmgdOJ5D9zzw6s72rABy9fWbRvy+GT+NKvX8Gyljg4BzjnYMwtyoOpDKIhDQ3REEJacQ56UHZNNDyVyBdqzADAzd94ErtPpBAP62irj2AgmcW1X/u977lqJB8LazjhpHXG1UjeaWNTPIzQGbBrSOQJgigbk3NfUS3FE/uGMJE1fEV+R98YXjwyijonAjYtLqN1wWAqi87GKBgrlClQI+wgkU/EbNkLElghyFmnfs32XnvZvxVtdTLKD0K0F7Cjd9EGcc2IrmHcsWtuvXoNuppiJa83G5DIEwRRNuZpDLyWGlSVa62KnHfOi8RqKJVFZ4MtkiIqLxXJx8L24iJT2TXCWrHrvheu0dkQRayEvaJrzGW/iFx5cW8ACOsMo5N2G89f2oxNS5oCrzdbkCdPEERZcCeKn6ldY1gcZoCRL8RVTYf0MpjKotNJfRR2jRrJez35+khIljUAUPRkIBDinDe5K8MmFtYRDYjkGbM9dtVSiitRfSyi2jV2G8/EoCtAIk8QRJkILZ1xJF9iUNW03CWA/Y4bTGZkfrsQbHU2qXcVqLqojrzJZSSva/7yJ4Q8kzdlVUrAfnLwDpSKp4FENOTy4wG4rB1p1yjnh8+AHw+QXUMQRJkIW2Smnnyp6L+wiId/RcnJnIFkxlAieWeiUd5QjnFnwNRHQhhL5+WkpHBAJC8E+S9+vh1LnQVJAODsRQ1FIt8UD2NkIoeORLTIrxfXCWlMCroq7EH3n20okicIoiyEAFszjOQNi/umOarXFHaJtzN45UQKgC28QMGuUXPQJz3VHgGnCqWM5IM8+YIs9o6mAQDvu3Q5vvi2c2WUf8GyZtx1y2VyTdi/fdN6fP29F7qv4xwbV8Rfnfx0JiZCAbMk8oyx7zPGBhljO5VtrYyxhxhj+5yfLbNxL4IgKgthpcw0krcsHtgx5C2xiIe/XdPTnwQAbHQGLqVdowi7txCYYXFZfx6ArCnvRR0wFVy2ug2JaEhG8u2JKC5b3Savtby1Dms6E65zhCcfU7z5cKjQsZwpu2a27vJvAG7wbLsdwCOc87UAHnHeEwRRY4jB0xnbNSXSLk2zdCTf0zeGlrowupvc2TWlInnTmQ0bmTKSLxZ5cY7odJqcgmiRUGFA1UvMJ5J32TXVNPDKOX8cwEnP5psB3Om8vhPAW2bjXgRBnHksi2PfQMp33+lH8u5z9g+m5Hth45SK5Dd2N8lsFpHzPqkMlHoj+bxpp0QWyhr4i7xfBo2I4GVRtLg9nBnx8doFU9k1teDJL+KcHwcA56dveTrG2C2MsS2MsS1DQ0Nz2ByCIE6X3+8bwvX//Dj6TqWL9olCYDMta2BYlhT1wWQG1//T43hk94DrmrJssLpik2lhz4kUNnY3ym26jOTVgVd3JJ8z7PuFp8iT9ys1IGyZcxbb93zN2nbXsX7pkCI3XvwECtE7Y8F20Wwz7wOvnPM7OOebOeebOzo65rs5BEH4MDqRA+fA2GS+aJ8ceJ1BJM85h8ULEXoqa8DihUU2DI8FpHr3+wbGkTMt6ccD8C3jO+G8/vb7L8JbLuiWRcWELx4049VP/IWYX7m2Hc9/9hpcfc4iAAVx943khSfvE8m31UehzXGJYcFcivwAY6wLAJyfg3N4L4Ig5pB8icqOp5NCKQ61POeK1Elv1o167Z39dpkB30heid5FVN+WiKIhFpYrOU0149XPq1cjdTHLVr2WX6aM9OTVgVenM+pU6tfPNXMp8vcD+JDz+kMA7pvDexEEMYfkpPgWTz09nRRKcR0h5iJyz0t7xn0f9dq7+pOoi+hY1VYvtwnrQ43kRbXHaEhDSGeyA5H15AMi+alE3m+7n7/u58mLiL+j2kSeMfZTAM8AWMcY62WMfRjAlwFcxxjbB+A65z1BEKfBswdH0Ds6OW/3L7U49kwi+UPDE9h6dFSWKfB2EEZgJF94vbNvDBu6Gl12h6YxMOYWefE6GtZcUftUnrzOfEQ+oEOIhDQw5t8x+A68Op3CmYzkZ2XGK+f8vQG7rpmN6xPEQuc9dzyLWFjDK5+/cV7un/cZABVIkZ9GJP8vD+9FT38S9956BQBF3D2Tn7z3UX3/XceTeOfFS4uuHdY0l10jcuajId0VtU8p8jOI5Nd3NeK8pc1FZZAB/zx50YF0NlaZyBMEMXcI39pv4egzRb7EknpC3Kcz8JrJW8gYpsyDNzxPAUGRvOgMDo1MYDJnugZdBbrGoFYyEMXKYkWRvBh4LV/k33HxUrzDp8Ox71scyQsLqSNRZXYNQRBzx7jP9PwzjYzkfYRcZsJMI5I3LAuGyYs6hkJ+vH9nIvbLma7KoKtAiLbG7H8FT16XZQ8AdeDVX/78sl6ieuk68n74pVCecrKT2qvNriEIYu7wS1s808iBVx+7xpKCPfV1RL0ar8XjrVXj9ffFcT19YwjrDGs7G4quLaL1SEiDZSmevDPwKhBRedCMVz8b53TKAvt58qfSdopoS11kxtc7XUjkCaLCEbMs5xNZ8tej5A/vGsC2Y6cA+Fs5XgyTwzCtoo7Ba9d4ywRbSiS/bnGDr+iGlHTGHCzk8vY50ZD/wGvQjFPfgdfTEXmfPHkRyTfXhWd8vdOF7BqCqHDG0vMv8kJ0855I/iM/3IJvPLofgDsDJgjDslzVJwuzZd3iXhTJO++Pj6WxvLXO99pCyKPhgj0T0hhCuuaK2gtVKIPtmtuuWYtV7fXOcSww6i9FeyKK6zcswqWr2uS2L77tXLz6rDbfJ5G5gkSeICqcZNr2ls9UaVo/Sk2GEkwnT96O5LnixcP56e5EvJ2JqWThBFVvFJZMRBH1qM+M1EiodD15XWP41HVn4y/fsE5e73QI6xru+OBmnLu0MEh80fIW/ORPLztjq0IBJPIEUfEknUjer6bKmSInI/ngcH06efLCj/fmx4vLBnUmwtYxTF7CS7c/H9WeEcXG/CP50tk1perSVBPV3XqCWAAITz4ans9IfuoJT9NJoTQsC3nHsgEKM1tFJG8EdCaqnRNU2EsdeC2O5P08eftnnae0sPDko04Z4fnsXGeD6m49QSwAhCcflPIXxJd+vRv/tb0fedPCR/99C17uPTWt8zjnuO2nL+Hp/cNym8iTz5cQ8mmlUJocnBe8eLk+rIzk/TuTnz53FN98dD8Mi0+Z3+4n8qr/7s2uSThrs3q3i06VInmCIOYUYdeUskr8+MWLvfjVy8fRN5rGb3sG8Ec/eGFa5/WPZXD/9n7cdtdLcptMbZwFuwZQs3XcA7BB+fi/6TkhO6wgTz6sZNcIoY5HQs4+JYXSM+P1jed14ZPXrsUiZxaqOFesEEUiTxDEnCIi+awxM5HP5C0MpjLyvJMTuWmdt7PPrvK4uqOwnJ2f+HrtmekMvApRF22Sk6I82TV+6Zg5064HP1U5gojiyddHgj15cczq9np88tqzpQ1UFMnP44D3bFDdrSeIBUAyY2fXZA1ziiMLcM6RzpsYTGVd657yaQixmFWqrlnqV6DM2+lMJ5IXIi5XfAooZ+A36Upk5gRVjxTRumrX1DtWjGp1FcoauL15cY7G3FYPefIEQcwpY9KuCV4T1Ys4diiVxYSyWtLxscyU5/Y4kXxYiX7VEsCWxWUnomLxqTuRQiRvut6LSL5Utcu8aQ/alkp9BOzIW4h6fdSO5F2ToULuSN7rxRf8fLJrCII4AySVyVDHx9JYefuv8JudJ0qeIwQ4a1g4oQj7LidKL8UrJ+y1XA2L48DQOFbe/itsPXoKAHD/9n6s/psH8MavP1kk8kBhIDUIMbCqPgVYrslR7qwblaxhgfPgAWjpyYc0WX+mzvHkQz6evBBzb7aNEP8YDbwSBHEmUAVRlBC48+nDJc/JKAJ8aHhCvp6YRrEz8eRgmBy/eLHXtW/vwDgAYNfxpOsJQTDVk4ZIlVStJ8MqTI4qVQhNlBGeTnZNyJM547ZrNN+f0q7xRvLkyRMEMZeoUe1k1hbHIKETpJWau4dHCiI/HbtHCLBhcde5XgaT2aJtU11feO05NZLnhUg+X6IQmii1HGTXqJOhCpG8Y9foBSvGa8uIGbCiYxB58hGaDEUQxJkgb3E5+CfK5wZlmAhUK+XwcGFFqalE2LK4ss6qhUPDwatRDSSL/f2pcuUNq9iuUWfATqd8QpBdE1I8efFkUBh4LS5lIF57I3n1Z1hniIRmXma4kiCRJ4hZhnOO0WmmK07nHMO0pO0gLJKg4loCl8gr0bjFecl75ZSnBsPkOFIikj/hJ/JTRfKWO7sGsDsG0+PF50tUtAwceFWya8TTSL2M5Itr2IjPUObN626RB2zLhuwagiBcPLx7EJd/+ZEZ1YF/fN8wLv3iIxgZL7ZADJPLiFQsIDJVJK968uq6p4bF8aNnj+DCzz/k8uoFqsj3nkq7zvUy6CPyU5U2MDzZNQBgmm67xs7eCb5GYAql4skLa6fOE8mrgi3K/TY7td2F6KulhlvrI2itP3NlgecCqidPELPMibE0MnkLo5M5NE2zbnj/qTRypn1Om2dpuLxlSZEXkfxUnnzGk/miMTvzxbI4nnTKFezqT8pyugLVK09NUcd+wM+TL6HOpiLe6n1M7h54LRXFA6XWZhWRuS5//3onu0b3pEsCwOYVLfjVbVdi3eIG13XVh6Qff+RSNMarW+QpkieIWcYvTXAqso4oeUvsAnYk3+CI/Fh6mp58zr63OE8IlWlxNMcjzrWKRVwV30xAFC8skIHUzCJ5dRZrUApl3pnVWorgsgaqXWNfX+TJe/13AGCMYWN3oQyw+ExVz39Zax2aSOQJglCRszpnMENViJK3Pg13Mk+EWJ2atL30IMtCIDz55W32AhuNMUfkecGmEEvRqagi75cHDwAdzvqkAz4Tq0pF8mrGjCryhsXlZCjD5L4dncpUKZRR1ZOPiki+9GpQ6nVnWAeu4qmxX4cg5h+/DJLe0UkcO2lnquzqT0qx5pzjuYMj0kNWBa7/VBoHHd+8XkbyoiLl9LJrVgiRj9vnm4r1o0byfafSODIy4fLkg0S+uS4CXWMYSM0shdJdEkHx5JVIPjeNSH46k6GkJx9xz3gNegoA/D35WoA8eYKYZUQ0rPriV/7vRwEAh7/8Rtz09SfQ1RTDM5+5Bg/sOIFbf7IV7Y4Pr0byn//lLvSOpgEUJvWINUK1qQZeHatlmbNUXkNU2DWFDBY1w+aKL/8OAPDL/35l4Rp5f7spEtLQGAth1GdguZSdrub7Zz158sLmMSzuO9tVZcqyBorvLidD6cX7vIS04uyaWoAieYKYZfzSBL2IGjK7j9tlBoadrBrV0hhL52XlSCFWo84TgDmFpSHtGiHyMft8i3NkHREddCJxy/K3UQRezYuGtECfeqqBV7/7qJG8aXHX04QfgStD6e7CYoBS1sCTLhl0XY3ZXn0tQSJPELNMqYFXw5WHbhVlsKiRfN60pFgLiyXlVKScKgMlnTcR1hm6m+IACgOvhsnlk4aYsXpstDDhSexTV0sSGSqCiK4FZpxM267Ju0VePS/oCUIQuMarT5pkomgyVOlIvtaieIBEniBmnVIDr6rwHxiakKLtPRcAciaX9VqEWBWOKx3JZ/ImYmFdDpKKSN7kBZEfcp4edvYVipaJNtcpwl4Xdc/4tO0af5EvVVPePfCqePLcK/KlB6yDxiNktK5E8qLImIjywyXsGp1EniCI6SCEev/gOB7YcTxQwHr6x5DKekWe49DwBB7sOYG8Ycmott4j8uoTwYmxDO7Z6i4klsmbiId1dDqrHSWiITtX3iqI/Mh4FqbF0dM/Js8Tnr8aydd5I/kSds0vtvb6ljsAglMoTYu7bJ6gAV9BUGaRt3QwULBeCnZNiewajdXcoCtAIk8Qs46IWL/12AF8/MdbXZaMKm5HT04W2TWGZeHfnjqET/98uyuqj4Q0xMMF4VX3/fT5o/jU3dtdM2zTORPxiI62+iguWdmKC5Y1I6RpdiRvFtZX7R2dxJGTBbtGjA2oIq/eF7DtkItXtEDXWNFkqv/3+4P4D0/lysLvFuzJq2MM6RKzbIESBcqU5f8+dd3ZWKssehLyyZP3csHyZrx6TXvJe1cjlF1DELOMd+BQTVVUxS1nWDJyVrdN5ExkDMsl5GGdoaMhiqOOIKt2zaAzKWkwlZEzbNN5E7GQDl1juPtjlwOw879NJZIH7FWg1ElPQ6lika/3sWv+5MpV+JMrV2HbsVN4yzefcu33K0EMuO2aXDmR/FQFykIabrtmLW67Zm3RvlIi/9YLl+KtFy4tee9qhCJ5gphlvGVyR5RURdWLzhmWzHCR51r2iks5w3IJYUjT0NlQKHegdgBCmIeUa6XzFmIRtzjrjEmRX9uZQEhj6OkfQ0Zpk/DpVXvIz64R+EXVQQOnbrumcE9rhp58cCQfnCbJmO23V3vZ4NNh4f3GBDHHeGetjowXRF4VwImcWbS4dt60ZImDCSXCDjmRvMBwRfJZ10/AzpOPh91/3rpmi3zWsJCIhbCmM4GdfUmkcyZanCcA0VGoFo1fJC/wi4yDIvGg7BrDs6yhsGuCxkCn8uSD1mS1SwcvPMkju4YgZhlv5otaWTKrCODxsbTvuUIkVdsjrDNXJJ/zieR/sbUXg6kMmusieP7wSbx+XYfr2kLkc4aFiK7hrCUJPLZnCB0NUSxqjGF0Mo9hp0MqFclH9dIiHxSJB5U18GbXiN8/GtJ9O4zA7BrpyfvXfw9rrOTAa61CIk8Qs0xRJO+ya4rF2XuuiGTVyFfXNHQ2xuR7YX1YzmLdAPDEvmE8sW8YFy5vBgDcdG6X69q6xmA6k6EaYyGsaK3D8HgW8YiG1e0JvHIiVYjkFavnE9esRdawc/of2zPkiuT9BDdo4FS1a3KqXWPBV+RjYc1X5IOi8ctXt+EdFy9FV3PMd/8fX7EKl5/V5ruvlll4zy4EMccYnolKw2ok7yrlWzxAaZgW0j6edlhz2zV5wxbF0clc0XqoY5N5vPG8Lrxz8zLXdl1jMoUyGtJlrfWR8Zy8tsyuUeyajoYo/u97L0SLU3ddFXk/jzvQrikRyau/gxgIjgasyBRUoGxlez2+8s7zAzuBT79hHa6oweyZqSCRJ4hZRgiwwO3JO2u0agxJn3rtOZP72h0hXXOLvOWe0KTSP5b2naykMwbD4sgZJqIhDQnHa5/MmaiP6HLCVEhjrklDQjT9ZpT6RvIBIh9c1sByTaKSdk3YX57CtVYmco6hT4sgZhlvyYGRiYIQixmsDbEQxp1Ivl6xRgzFrlEJ6QwdieLsGr/FtDN5y3eykiYiedNCJKS5vPZYRJcdQySkyVWWNFaoFVPIXim0128GaZAn77WxBKbltqYKnnzAAOsC9NXLgUSeIGaZUtk141lbwBrjYSlsYvk5ca6vD61paKkvHCesDz9f375+8XCb8OTFwKuaNRMP67JjiIQ0pf66GrUXlw3wi6qDRD6oro1du8aSncnkado1hD8k8gQxy3jz5JPKZCiRMSOsEQCuYl9qdo1KSGdY0hzH/7x5I16/rkN2JMLy+fnHLsfNF3TL4/0ieVd2TUhzFR6Lh3W0JRzPXdd8Z4j6lfIVOeuqaxNk1+RLijxHq9OJHRmxJ3yJ915HiOyamUGfFkHMMt4Zr2q+uxT5aEGEmxVBznomQQmEmH7w8pVY3lon0zTF9c5f2ow/fc1qeXyQJ+8SeSVNMh4pFDOLhDRlhmhBYcM+pXz9hF8sPejFDKicKVIou5rsrJhd/UloDFjUWGiPQGNT19In3My5yDPGbmCM7WGM7WeM3T7X9yOI+cYbyav57uPO64QSybfUh4v2e1Gn8od1TUbyEzm7pLC3aJhfKWAZyUtPvmCHqBUr7UheVG9UBb3YrmGMIaJrrog/2JP3j+QtJ5JviodRF7Fz49sSUXkfl2W0ACczlcucfmKMMR3ANwHcCGADgPcyxjbM5T0JYr7xevLqoOJ41kBE1xBTUhSb4gWvPemzuDbg9qFDuiY7komsISNyNXoPsmsMy15DNaJrrvLF8bCOzgY7kk7nTd9MGhnJe4Q2pDNXdJ/Om+A+JYdLe/IcGiukiXYkorIiZNQ1BkBR/EyZ627xEgD7OecHOec5AHcBuHmO70kQ80qpWu8TWQPRkOYST7GwNoCiqpQCNZqN6Aw50wLnHBNZU3rrDbEQRKXcxpj/wKuIsiMhTebJA3YkL2bUjqXzslNROxc/a0a0TW2fafkvxi3KI3trtosCZSGtMKu3szHqO/hbi/Xe55q5FvklAI4p73udbRLG2C2MsS2MsS1DQ0Nz3ByCmHuCUgUBO5KPhnWXUKpRd9JnghTgzkcXloVp2YuKCNtF0xgaHOH2TaFkzJWeqE54iisiP5kzfas2+tVrt48pLvzlN/gqnmhizrGiQzI5h2FyaBqTTxOdDVHfgmMLsfZMucz1J+bX7bq6eM75HZzzzZzzzR0dHT6HE0R14bcQtbA6JrL2RCRhQYQ05rJNgiL5kF4sdHmTY1yxa4CCF+/nyYc0JnPwIyENmsZkBxGPuCdbiTEAv85lqkgecNfoEQiLSVhVEaWzspxIXto1DVEZtbs9eYrkZ8pci3wvAHVu9VIA/XN8T4KYV/ysCjHxyI7kCyIfC+sufz6Z9o/k/bJc8pZlz1aNqv6+PXjpF/FqWiGSFwIr2hUL667aOH5RtJ9PL471bisZyQuRDxVE3rDsSF6IfGdDLGCGLUXyM2WuP7EXAKxljK1ijEUAvAfA/XN8T4KYNzjnvotsi1mt41kD0VDBrol6VnwKyjH3ZtcAwL6BcXvgVcl3b4yFA9df1RmTE43E/UUHEQ/rricK30i+lCdfwq7JGRaeOziCfYMp+3cOF1ZwApxI3vJ48mokHyo8QQTVkieCmdMqlJxzgzH2ZwB+C0AH8H3Oec9c3pMg5hPT4uAc6G6KoX+ssNapGOScyBpY2hKXAhcNaVjcZAvb4sYYTgSsjxp2ZdfYr9/+7acBAOu7GuW+pS3xwDGBkM5k8S8p8k4HoVadXN1e72uViLRGrxXUnogWjQGopRn+/dkj+Pwvd8n3sZA7krecAmU6Y1jdYS/Zt7K9HsMTOTTEQvKpJx7WKYXyNJjzUsOc8wcAPDDX9yGISkBYEu+9ZDmuWteJN3/jSQCFSH4yZ3vyMpIP67h4RSue/OvX42/u3ekr8rrG5ILUQPHgo5rv/rk/2Ii8z2QqwB54nfTYNWokDwBb/+46REManj04UnSvm87twuaVLUVPCnd84GJM5Ew8tGtAblMj+ZeOjrqOD3usIBHJ6xrDxSta8MRfvR7LWutwVkcCb9iwCJ+6e7v8PYNqyRPBULdIELOImO0aj+hY2hKX29ViYF67BgCWttQF5oB70wa9loVqsySiIVeNG+91RK561BF11ZMH7FIC9dGQEsm7Uyi7muLw0lwXcZVpANwTonb1J7FuUUPR7yM6EMPx5MX2Za11AOxOoLMxJme4xsP+Yw1EaegTI4hZRGSQhJX6L4B7CT1vJC8IEjCv+BdH8tN7INeUpwERyYsOwlvxUZYXnqaoeiNsUdpgPGvg4PCEawETcU1xD8vJrgnKgRfXjoV1yq45DUjkCWIWeGT3ANI5U/rhId29nqgrkg/rLk9eoApYLKxu92SzeDJMvGuwBqEKsehk6iI64mHdZQcBysSnaYq85jl/Mmfg1zuOo6dvDABw7tLCuEEha8b+KRYNCRJ5jTFozB6wpeJkM4eW/yOIMjk8PIEP37kFbzqvC7ffeA6A4txxVYhjaiSvrrKkLM6RiIaRyWeda7nFLxJyv1fz5Euhiqi477rFDdg/NF50bNhnxut0rw0A977Uh6cPjOCKNfZyexu7m/C2i5bggR3HZXtDugbGCmUNgkRe1+xj13Ym5CxYYvrQJ0YQZSKKiv1+75DMkQ/rDLrGZJlcNZKvj4ZcefICIajtiag7wtdKR/LqwGspNJ9I/iOvWY17P35F0bF+JQVKoSuRfHdTDE8fsAdun9o/gvZEFJ0NUXz1nefjlc/fiPWLbX8+b1qyMqbpZNf4XltjCGsM//iO8/Glt507rfYQBUjkCaJMRFGxVMaQs11lnrlesEUE9dHigVegIKidjdFAG0c9Tl5vmp68y66ZQrz9Sg2XQu1ANnQ3ufZt7G4EY4UMIbF/38A4NGchE9Pi0APupTFGqZNlQJ8cQZTJmFI5MuVE9UKIvTNLxeuIbou+uvqRFPmGqJLHXpxR4l0o3G8xbT9cA69TnOO3aMh02bSk0fV+Y3ej735R7dI0pxHJ04DraUMiTxBloi7ILQYavZ626sknoiElu6a4bEBHgx3JM+ZklHi86tFJd32bSZ81Yf1Q9XpKkZczXmcuERudSF3MXt20xB3ZL2kupGFyDnz3yUMwnBmvvu1mjMoZlAENvBJEmaiR/MHhCQCFCDjsG8n72zWaFPkYIqFxhHV7gNZrVbxmTTtWd9TjE9esxTd+tx+Xr26bVjvVQcupRV6UFJh+BH356ja8/eKluGRlK161sgV/+prVuOPxg7jM0z7GGN5/6XIsa63Djr4x/Orl40XtU7liTbtvwTViepDIE0SZqEXFxpwoW9odjljWuzz5kLRx3MXJ7HOFXRNxRN5rVbTUR/C7v7gKAHDzBa7K3SVxRfJTefKy/dOPoH96y2Xy9c8/9moAwPUbF/se+4W3FgZQj596CluPnkJQk95+8VK8/eKl024H4YaegQiiTNRIXrwWIiqKa6kLdNRHQ9KmUSP5kxM5AEBbfcRZUs/OtZ+tqfy6z2SoIEIzzK4ph3Oc2jveXH1idiCRJ4gySWbychHqU2kRyburOCYUT74+ok6GKmw/5TwFNNdFnAhec2bOzs6faSEtkk25GLbfylBzhUipPDIyMef3WoiQyBNEmYyl82hPRBHWGU5N2tG4d2WleNidXRP18eSzTvpla30EkZCdVRPRZy+zRPQV05nFOtMUynIQVTSPj/lX4CTKgzx5giiTZDqPxngI9dEQBlP2LFUxq1OIvJpFk4iG0J6I4qOvW41r1nfK7f/87gvwsxeOYW1nAu/avBSXrW5FQpk4VS4iep9OymVTPIyPX3UWrl2/aFbuXYoLl7fgo69bjXdvXjb1wcSMIZEniDJJZgwsboqhPhJC36k0gMJC2rKsrlrHJqpD0xg+c+N613VWtdfLsgivWTv7S2EKT346Is8Yw1/dcM6st8EP3eezIGYPsmsIokzG0nk0xsKuWa0i5S+kF9sy052hOtsIC0YdByBqHxJ5giiTZDqPpnhYWjTqGqsiglejZ7XC5JlkJnYNUTvQt00QZZA1TGQNCw2xkJzVqi6F57cg9nylCkq7hurALCjo2yaIMsjkxEpQIWnDqMvjeSP6+UT36XCI2oe+bYIog6xZWBhb2DVqJB/W7XLDlVBFcSYDr0TtQN82QZRBzlk0O6prcuC1MV4YWBX1ZyoBXQ68VkZ7iDMDfdsEUQZZR+QjIU2ul6oW0wppWkVYNcDMl/QjagP6tgmiDHKKyNf5ePKN8RCa6yLz0jYvOmXXLEhoMhRBlIEUeV3zza657eq1+MBlKwAAT/zV6+fVKtHIk1+QkMgTRBnkzEIkX+9j17TUR9BSb0fyy1rrznwDFUJk1yxI6NsmiDKQA6+hwsBrU4UucEGToRYm9G0TRBnk/AZeY5X5gEwplAsT+rYJogzU7JqlLXXQGLCyvX6eW+UPDbwuTCoz5CCIKkF48tGQhjWdDdj2uetd2TWVhMyTJ09+QUHfNkGUQSG7xpkIVaECD1Akv1Chb5sgykD15CsdkUJJpYYXFpX/P5MgKpicUahdU+mEKJJfkNC3TRBloObJVzpk1yxM6NsmiDLI5gszXisdjSZDLUjo2yaIMhCRvFjLtZIhu2ZhQt82QZRBzrAQCWnzttrTTKDaNQsT+rYJ4jT5t6cO4cn9w1WTd06e/MKEJkMRxGny1Yf2IpUx0J6ojFLCU7FucQOu27AI5y5pmu+mEGcQEnmCOA0si2M8awConoHMpngY3/ng5vluBnGGKet/J2PsnYyxHsaYxRjb7Nn3GcbYfsbYHsbYG8prJkFUFqmMAc7t12R/EJVMuZH8TgBvA/D/1I2MsQ0A3gNgI4BuAA8zxs7mnJtl3o8gKoJkJi9fk8gTlUxZ/zs557s553t8dt0M4C7OeZZzfgjAfgCXlHMvgjjT7B8cxwe+9xwmc0bRvrE0iTxRHczV/84lAI4p73udbUUwxm5hjG1hjG0ZGhqao+YQxMx56egontg3jGMn00X7kqrIV4knTyxMprRrGGMPA1jss+uznPP7gk7z2cb9DuSc3wHgDgDYvHmz7zEEMR+IWvHpfLHLqEbyIY1EnqhcphR5zvm1p3HdXgDLlPdLAfSfxnUIYt6QIp8rFnnVk88aNNREVC5zFYLcD+A9jLEoY2wVgLUAnp+jexHEnJBxIvjMFJG8X6RPEJVCuSmUb2WM9QK4HMCvGGO/BQDOeQ+AuwHsAvAbALdSZg1RbYhIPpM38flf7sL6v/uN3JdMFwZjM06RMoKoRMpKoeSc3wvg3oB9XwDwhXKuTxDzibBh0nkT33vykNwWDekUyRNVA40YEUQAoozwpOLJD4/nALg9eT87hyAqBRJ5gghARPIHhybktsFkBoDtyXc0RO3jyK4hKhgSeYIIQIj3i0dOym1DqSwAO09+WUscQKGmPEFUIiTyBBGAGHjd3jsmtw06Ij8ykUN3sy3yV63rOPONI4hpQlUoCSIANf89FtaQNSwMpbIYzxo4MjKJt1+0FE/dvh5t9dVRaphYmJDIE0QAIpIHgNa6CHKmhcFUFruPJwEAm5Y0YokTzRNEpUIiTxABqFkzjfEwGGMYSmWws8+2bzZ20+IbROVDIk8QAaiRfGM8jHhYx1Aqi57+JNoTUXQ62TUEUcmQyBNEAGpqZGMsjKZ4GPsHx6Fp41i3OFEVi3cTBGXXEEQA6sBrU9wW+bF0HmOTebTU0WArUR1QJE8QAag1aRrjITTFwxjPGgjpDI3x8Dy2jCCmD0XyBBGAN5JvjNsx0anJPJpI5IkqgUSeWLA8e3AEj+8NXo3MNfDqePLqe4KoBkjkiQXL1x/Zh8/d3xO4XxX5pnjYJewiqieISodEnliwpPMmDg1PYDxbvFB33rRgWoXVKBvjYTTVFUSe7BqiWiCRJxYsIkVSzGB17TPcRceKInmya4gqgUSeWLCIgdWevrHifZ4a8XUR3RW9UyRPVAtkLBILFhGt7+xPgnOOz9yzA0dGJvG3b1pflAcfCWkuH55SKIlqgUSeWLCIPPie/iSyhoW7XjgGAHhszxBuOrcLAHDb1WuQzBhY05EAY0BIYzAsTpE8UTWQyBMLFmHX7BtIYXQyJ7cPJjOyONmG7kbcsKlL7muKhzEykUNDjP50iOqAPHliwZI1LCxvrYNhcbx09JTcPpjKSisnGtJd5zTGw6iL6Ajr9KdDVAf0P5VYkHDOkTMsXLyiBQDw/CH3En9i4DUacv+JNMbDZNUQVQU9cxILEhGpr+lMoCEakiLfWh/BYCqLjIjkw26Rb46HizJvCKKSIZEnFiQiRz4W1tHVHMPhkQkAwIq2Ouw+nsR4xp4glYi6o/ZPXLsWk1kSeaJ6ILuGWJCIQddoSENTPIzJnP1+VXs9MnkL/afSAIrLF1y0vAVXrm0/s40liDIgkScWJMKuiYV11+zVVW31AID9g+MAaNITUf2QyBM1R960YCl1Z/zI5N2RvGBFuy3y+wZTCGkM8bDuez5BVAsk8kRNwTnH67/yGO585nDJ4wopkppr9urq9kIk3+Qs3k0Q1QyJPFFTpLIGekfT0m4JQnryYV2KvK4xLG2JAwCSGYNKFxA1AYk8UVMMJrMAbJEuhciuiYY0NDqzV0URsoiTG08iT9QCJPJETTGUskV+LJ0veZw68Co8+fpICIwxdCSiACDFnyCqGRJ5hcmcgSNOvnStkMmbODBU2rqYb6b63PcPjiPnqe8exGAqAwBIpvPYN5CCYVp45UQSOcPC43uH8MjuATyyewC7nBryqidfH7UHWTsbbZGnzBqiFqBQReH7Tx7CHY8fxPbPXV8zA24/eOow/vnhvdj+uesRq9BMkR88dRjffuwAtn/ueuia+3MfS+dx4788jr//g414/6UrpryWiOQPDo3jhn95An/06pX43pOH8IeXLcePnj1adLyaXVMftf8cOhucSJ5EnqgBKJJX6B/LIJkx5MSYWmDr0VFkDWtK+2I+OT6WxnjWwMmJnO++vMlx9OTktK4lRD6ZMWBaHC8dHQUAPL1/BABw78dfjfddulweH1Xy5OsidifYIUSeVn8iagASeYWkI4TJTOUK4kzZ1W/bEskKFvlk2h4kFVaLihDtIWdAdSoGU+7jjp60Z64eGplAS10YFy5vwcbuRrk/FtLk2q0JGcnHAJBdQ9QGJPIKItqt5Kh3JoxO5NDnTM+v5N9JtM0r0EAhW8Zvnx/ejmJ43D6P84J4i5+AiORFdo3XriE3k6h+akbk86aFe1/qxdhkHr/Zebxo/9ajo67c6V+9fBzjWXeanUi7E5FlqXv950t9JWdVPn1gGMemaTEEYVkc//lSHwwzeNBxy+GTrt/LsjjufakXWcNET39hgeodfWN45sAIdvaNYWffGJ47OFI02JnOmfjuEwfxzUf3456tvdNqY9Yw8f0nD+FHzx4B56VnmQL2557yPCmJJycRtW87dgqvnLDbLsR9SBH5sXQeD+wo/o69x3kRNowQccD25BPREDRWGHgVx1EkT9QCNROq/OtjB/DVh/bikpWteP7wSTz3N9dgUWMhYvv03dtxVmcC3/ngZvSOTuLWn2zF375xPT7ymtXymOQ0I/nf7xnCJ3+2DUta4njVytai/abF8b7vPIeWujBe+h/Xn/bvtOXIKD75s21ojIdw9TmLfI95x78+AwA4/OU3AgBeOHwSf/6z7eDcHf3+w3/tcp3X2RDFlWva8bV3X1D4vfYO4n/9ard8/7qzO9CWiKIUT+8fwf/8pX3ti1e0YH1XY+CxB4bGcetPtuJvbjoHt7z2LLldfN5CoN/yzafk7yQiczVC//FzR/CPv9mDxz59FVY6M1QBu4PrHU2joyHqK/ZC3EX2DGAv58cYw+aVrdjY3QQAWN/ViM6GKM5Z3FDydyeIaqBmIvntvWMAgIPDdlR7YqwgCpxzHB/LYCCZce3b2Tfmuob05KcQ+eOe63g55LRhdLI8i+T4WNq5j3906hc573B+p519SfT0J6XP7GVoPIsTSXf7xe/zD3+wEQAwMA0fXL2G93pediptU5GefDJTFOULsR6dzMs0Snmdfvf3d2hkApM5E5etbvO9f4cj7m31BZEXWVR3f/Ry/OFldvZOd3Mcz3/2WqzpJJEnqp+yRJ4x9n8YY68wxl5mjN3LGGtW9n2GMbafMbaHMfaGsls6BUNOpDc8bmdoqFHseNZAOm9KwRD7dip2BudcRpRTDbwOJUV06S+CQsS86YAzpdBef/FUbaVBp03CotnZP4ae/jFcsqr4SQNAUaRv3ycLXWPY4AxMBt3XdY7SEUw1OBokztKuGc9i9/FUUZsEwl+Xv6OnsxDbX31WgMg7TyWRUM3ENgQxJeX+b38IwCbO+XkA9gL4DAAwxjYAeA+AjQBuAPAtxticJWlzznFsNO3apj6uDym+rmVxKYgHh8YxmbOFcjJnwnA89qnsmqHxYp9YpccRsXIXe/Z2Sl5UERbCKe695fBJHByawPlLm2VqYND11fftiQgWOQOT0xnsHBrPyEqN4nMJQojwoeEJTDjjIZm8KSP0wWRWth+w/f6hVBb1TvsHU1kkM3kcGZl0/a7y+n1jiOgaNjtL+nnpVOw7glgolKVCnPMHlbfPAniH8/pmAHdxzrMADjHG9gO4BMAz5dwviBPJTFGO9WAqg2cPjuDQ8ARWOb6tYXGMTuakGFkceGLfMB7sGcB4tiDsybSBB3tOIG9ypDJ5LG+rwzMH7GtFQhpecaLNrUdG8bWH9uK1a9tx5zNHsHlFC5riYXzniUMA7M7CMC2EfBZ93ukMhJ7VWY97tvbhug2LcPMFS8A5x1cf3Ive0UkcdsTs5d5T+NTd25AzLKxur8enrl+HR3YP4KFdA/J6H/vRVvz4I5di/+A4uppiOO5YL5uWNKIxFvbN/R9L5/G/frkLIxM5fOKatRhMZdHZEJOe9YM9A0jnTHzo1Std51kWx9d/tw9vuWAJBpNZLGuN48RYBk8fGAbnHLe+fk3RZLJv/G4fnj4wItv2vu88i398x/n4zhMH5TGvnEhhRPkeh1JZDCYz2NDdiBcOj+KLD+yWE7q6mmLo6U/K7/i9lyxHT38SZy9OBI4jqAOuBLFQmM2B1z8B8DPn9RLYoi/odbYVwRi7BcAtALB8+XK/Q6ZkOJXD6vZ6HBwuZIsMprL41mMH8MKhk/jS286V24fGsxhMZhEJacgZFr724F7sGXBbBGPpPL764F5M5AyMjOewvLUOewZSRQN6zx8+iecPn8Rvd57AnoEUHtp1Au2OwKxb1IA9A7ZoLfKJIH/w1GH8Ymsvzl6UwN6BcezsG8PNFyzBiWQG33h0v+vYnX1J7OxLoqMhil++fBx/ePkK/NPDe6VdccGyZmw7dgp/e+9OWBz46GtX4z+29iKsa9i8ohVN8TBOJDPYtKQRsZCOLUdG5bW/+6TdIS1vrcNgKovuphhiYR0NsRAe3j2AR/cM4t2vWuaaLXtweAL//PA+ZPKW7BhMi+Op/SN4av8I3nrRUixpjsvjk5k8vvLgXgDA371pA774wG5s7x3DbT99SX72129YhAND4+CwOyYxpjCRM/G6szvs79npnC9a3oxr1i/C//ntHnzuvh4cPTmJd21ehp7+Mbxh42I0x8N443ldyOZNPLx7EK8+q81lQwH2uINILyWIWmZKkWeMPQxgsc+uz3LO73OO+SwAA8CPxWk+x/vm13HO7wBwBwBs3rx56hw8H85d2oTfffoq3PLDLXjQiW4Hk1n09I0hnTfxnLNIs9g+mMpi3aIG9I5OFgk8YD8F7B8ah+nYN+KYb73/Ivz3n7xUNMAo9mfyFnpH0/jLN6zDms4EPvrvL2IwmfUVeWE17B2wB2kPj0wilcmjx+MzC5Y0x/G1d52Pd9/xLLYdPYW9Jwppkz/88CX4+I+24sn9wwCAazcswh9dsUruF/nef37t2VjVXo+rv/p7n/YkMZTK4oJldoZJR0MUKWfW6J4TKZy/rLmo7T39YxhKZbG6vR6mxXFgyO5ke/rGXCIvJmT92x+/Clet68R1GxZh4+d+6/rs33/ZCinmO/vG8Kb/+yQe2zMEANi8shV/dvVaV3u3HLa/U3GNJ/cPY3Qyj43djdA0hm++7yL85LmjeHj3IDZ2N+Kzb9zgOt/7dEIQtcqUnjzn/FrO+Saff0LgPwTgTQDezwvpHr0AlimXWQqgf7Yb70VNjevpH5OP/o++Mii3D6ayTvQZxaYltqCpA3V1ER3PHzopBV7AmJ1at77LP+Pixk2FfnBjd6O0BvwGLzN5E/uU3HZx7q7+JHb2j4ExO7JVWd/VKCPR/9zWh5ySO98QDWHjEntfc13YJbBAId+7syEmc8BVrtuwCNt7T2FkIisHJ2OhQuTuHSgV3rroGDoaomhXrqsOaAOFAVeRohjWNaz3pCeqOemijY/tsb83NQIXrO9qhOoI3f3CMfsezneqXpPy3YmFTLnZNTcA+GsAf8A5V2f+3A/gPYyxKGNsFYC1AJ4v517ToSNhR8wtdWHpSQO2Z99aHwFg+7xDqSw6G6NSPDYpwrCspU6WoVVZ2VaPRDSEbo+ACm6+YAkijve+sbtJCpXf4OzegZSrE3nXq+z+sKfftihWtde7ImcAWNoSR0MsjBVtdXhgxwnXPsaYFNCN3Y1FfriowdLREEUiGkI8rKOlriB8r1rZgqFUFpwDHc5Tx7AyiNrT781isUX75EQOOdNCR0MU6Vwh02eXT6ewqDHq6mA2dDe5jlHL+rbVR8AYcHwsgxVtdb41ZOqjITnWAgC/2nEcGgPWLy50COIJhgqNEQuZcj35bwCIAnjIEZZnOecf45z3MMbuBrALto1zK+d8zqt+iUh+Y3eTtC7WdCawf3Acy1vrkDMs3PH4AYxO5tGRiGLtogbn+Ea0JyIYHs9hSUscewZSaIyF7EWe42EcGBqX9U5WtNW57tlWH8HIRA4XLm/GOV0NGEhm0NEQlSsPfeXBPfie43sLxEzbsxclcHh4Elec1Y6Ohii+/rt9mMyZuGHjYixrdd9HrFi0qbsJR0YmURfRXYOpm0SH5RFPwBY5xoC2RASMMXQ2RtHdFMczB0eKzhFPICEn/fPsRQn850t9eEGxvA6PTMixBPtzj0E8wy1rjePxfcO47msFS+jYqP07qmxa4o7OGxQhD+ka2urt78Pv9yn8zk04NDyBszrs7/isjgTiSiaRiOCp0BixkCk3u2ZNiX1fAPCFcq4/U67bsAiHRybw5vO68a+/P4B1ixpw9uIG3LetDzed24UTYxlsPToKXdPwpvO70d0cx4evXIWrz+nEuUua8F/bj+Oy1a2Ih3VcdlYbElEdDdEwDo9M4CInLe8Dl63EUCqL91yyHHdvOYY3n9eNx/YMorMhik9cs1bmfEdDOj557Vrs9fH8AeDNLXW45pxO7BlIIRLS8BfXnY3H9w2BgeFDr16Bc5c046OvW413bV6Gu54/KsvsCi/5VStb0JaIygHRVe31uO2atbj5gu6ie73j4qVY1lqHsPOk8efXno3mujCuWd+JV61sxTldDXjvJcuRMyxc6uTVf++PXoXH9gzhnK4G/HzLMdf1zl7cgA9fuQr3bO3FZM7EFWe14cJlzVjf1Yir1nXgB08dBleGYNYuShSVCb5pUxcODU3gHZuX4tc7TqA9EXHtv+2atXju4MmS3vkfX7ES5y1twrLWOty3rQ83bOpy7V/f1YiPvm619PoJYiHCplNv5EyxefNmvmXLlvluBkEQRFXBGHuRc77Zbx9N/SMIgqhhSOQJgiBqGBJ5giCIGoZEniAIooYhkScIgqhhSOQJgiBqGBJ5giCIGoZEniAIooapqMlQjLEhAEfKuEQ7gOFZas6ZpFrbDVRv26u13UD1tr1a2w1UfttXcM59p3ZXlMiXC2NsS9Csr0qmWtsNVG/bq7XdQPW2vVrbDVR328muIQiCqGFI5AmCIGqYWhP5O+a7AadJtbYbqN62V2u7gepte7W2G6jitteUJ08QBEG4qbVIniAIglAgkScIgqhhakLkGWM3MMb2MMb2M8Zun+/2TAVj7DBjbAdjbBtjbIuzrZUx9hBjbJ/zs6UC2vl9xtggY2ynsi2wnYyxzzjfwR7G2Bvmp9WyLX5t/3vGWJ/zuW9jjN2k7KuItjPGljHGHmWM7WaM9TDGPuFsr+jPvUS7q+EzjzHGnmeMbXfa/g/O9or+zKcN57yq/wHQARwAsBpABMB2ABvmu11TtPkwgHbPtn8EcLvz+nYA/7sC2vlaABcB2DlVOwFscD77KIBVzneiV1jb/x7Ap32OrZi2A+gCcJHzugHAXqd9Ff25l2h3NXzmDEDCeR0G8ByAyyr9M5/uv1qI5C8BsJ9zfpBzngNwF4Cb57lNp8PNAO50Xt8J4C3z1xQbzvnjAE56Nge182YAd3HOs5zzQwD2w/5u5oWAtgdRMW3nnB/nnG91XqcA7AawBBX+uZdodxAV0W4A4Dbjztuw84+jwj/z6VILIr8EgLrSdC9K/+eqBDiABxljLzLGbnG2LeKcHwfsPxgAnfPWutIEtbNavoc/Y4y97Ng54vG7ItvOGFsJ4ELYkWXVfO6edgNV8JkzxnTG2DYAgwAe4pxX1WdeiloQeeazrdLzQq/gnF8E4EYAtzLGXjvfDZoFquF7+DaAswBcAOA4gK862yuu7YyxBIBfAPgk5zxZ6lCfbfPWdp92V8Vnzjk3OecXAFgK4BLG2KYSh1dU26eiFkS+F8Ay5f1SAP3z1JZpwTnvd34OArgX9qPeAGOsCwCcn4Pz18KSBLWz4r8HzvmA88dsAfgOCo/YFdV2xlgYtlD+mHN+j7O54j93v3ZXy2cu4JyfAvAYgBtQBZ/5dKgFkX8BwFrG2CrGWATAewDcP89tCoQxVs8YaxCvAVwPYCfsNn/IOexDAO6bnxZOSVA77wfwHsZYlDG2CsBaAM/PQ/sCEX+wDm+F/bkDFdR2xhgD8D0AuznnX1N2VfTnHtTuKvnMOxhjzc7rOIBrAbyCCv/Mp818j/zOxj8AN8EezT8A4LPz3Z4p2roa9sj8dgA9or0A2gA8AmCf87O1Atr6U9iP2HnY0cuHS7UTwGed72APgBsrsO3/DmAHgJdh/6F2VVrbAVwJ+9H/ZQDbnH83VfrnXqLd1fCZnwfgJaeNOwH8D2d7RX/m0/1HZQ0IgiBqmFqwawiCIIgASOQJgiBqGBJ5giCIGoZEniAIooYhkScIgqhhSOQJgiBqGBJ5giCIGub/A3XRhTfldjE+AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-5:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
